I want to sanity test that the mock measurements I'm comparing to are what I expect them to be, and the bad fits I'm recovering are because my models are a poor fit and not some other reason.


In [1]:
%matplotlib inline
from matplotlib import pyplot as plt

In [2]:
import numpy as np
from os import path
from astropy.table import Table
from halotools.mock_observables import hod_from_mock
from halotools.utils import broadcast_host_halo_property, add_halo_hostid

from pearce.mocks.kittens import TrainingBox, MDPL2
import h5py

In [3]:
cat_dir = '/u/ki/swmclau2/des'

In [4]:
sham_catalog = np.load(path.join(cat_dir, 'cut_sham_catalog.npy'))

In [5]:
shuffled_sham_catalog = np.load(path.join(cat_dir, 'cut_shuffled_sham_catalog.npy'))

In [6]:
nfw_sham_catalog = np.load(path.join(cat_dir, 'cut_nfwized_sham_catalog.npy'))

In [7]:
sham_catalog.shape, shuffled_sham_catalog.shape, nfw_sham_catalog.shape


Out[7]:
((500001,), (500000,), (500000,))

In [8]:
catalog_fname = '/u/ki/swmclau2/des/test_MDPL2_halo_vpeak_smf_sham_large.hdf5'
halo_catalog = Table.read(catalog_fname, format = 'hdf5')#, path = 'halo_vpeak_shuffled'))


/u/ki/swmclau2/.conda/envs/hodemulator/lib/python2.7/importlib/__init__.py:37: H5pyDeprecationWarning: The h5py.highlevel module is deprecated, code should import directly from h5py, e.g. 'from h5py import File'.
  __import__(name)
WARNING: path= was not specified but multiple tables are present, reading in first available table (path=halo_vpeak_catalog) [astropy.io.misc.hdf5]
WARNING:astropy:path= was not specified but multiple tables are present, reading in first available table (path=halo_vpeak_catalog)
---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-8-849acdb31062> in <module>()
      1 catalog_fname = '/u/ki/swmclau2/des/test_MDPL2_halo_vpeak_smf_sham_large.hdf5'
----> 2 halo_catalog = Table.read(catalog_fname, format = 'hdf5')#, path = 'halo_vpeak_shuffled'))

/u/ki/swmclau2/.local/lib/python2.7/site-packages/astropy/table/table.pyc in read(cls, *args, **kwargs)
   2428         passed through to the underlying data reader (e.g. `~astropy.io.ascii.read`).
   2429         """
-> 2430         return io_registry.read(cls, *args, **kwargs)
   2431 
   2432     def write(self, *args, **kwargs):

/u/ki/swmclau2/.local/lib/python2.7/site-packages/astropy/io/registry.pyc in read(cls, *args, **kwargs)
    466 
    467         reader = get_reader(format, cls)
--> 468         data = reader(*args, **kwargs)
    469 
    470         if not isinstance(data, cls):

/u/ki/swmclau2/.local/lib/python2.7/site-packages/astropy/io/misc/hdf5.pyc in read_table_hdf5(input, path)
    129 
    130         try:
--> 131             return read_table_hdf5(f, path=path)
    132         finally:
    133             f.close()

/u/ki/swmclau2/.local/lib/python2.7/site-packages/astropy/io/misc/hdf5.pyc in read_table_hdf5(input, path)
    110                               " table (path={0})".format(path),
    111                               AstropyUserWarning)
--> 112                 return read_table_hdf5(input, path=path)
    113 
    114     elif not isinstance(input, h5py.highlevel.Dataset):

/u/ki/swmclau2/.local/lib/python2.7/site-packages/astropy/io/misc/hdf5.pyc in read_table_hdf5(input, path)
    138     # Create a Table object
    139     from ...table import Table
--> 140     table = Table(np.array(input))
    141 
    142     # Read the meta-data from the file

MemoryError: 

In [ ]:
len(halo_catalog)
halo_catalog['halo_id'] = halo_catalog['id'] halo_catalog['halo_upid'] = halo_catalog['upid']
#add_halo_hostid(halo_catalog) #broadcast_host_halo_property(halo_catalog, 'halo_mvir', delete_possibly_existing_column=False)

In [ ]:
haloprop_bins = np.logspace(10,16, 61)
hbc = (haloprop_bins[1:] + haloprop_bins[:-1])/2.0

In [ ]:
sham_hod, _ = hod_from_mock( sham_catalog['halo_mvir_host_halo'],halo_catalog['halo_mvir'], haloprop_bins)

In [ ]:
shuffled_sham_hod, _ = hod_from_mock(shuffled_sham_catalog['halo_mvir_host_halo'],halo_catalog['halo_mvir'], haloprop_bins)

In [ ]:
nfw_sham_hod, _ = hod_from_mock(nfw_sham_catalog['halo_mvir_host_halo'],halo_catalog['halo_mvir'], haloprop_bins)

In [ ]:
plt.plot(hbc, sham_hod)
plt.plot(hbc, shuffled_sham_hod)
plt.plot(hbc, nfw_sham_hod)

plt.loglog();

In [ ]:
r_bins = np.logspace(-1, 1.6, 19)
rbc = (r_bins[1:]+r_bins[:-1])/2.0

In [ ]:
sham_wp = np.load('/u/ki/swmclau2/Git/pearce/bin/mock_measurements/SHAMmock_wp.npy')
shuffled_sham_wp = np.load('/u/ki/swmclau2/Git/pearce/bin/mock_measurements/SHUFFLED_SHAMmock_wp.npy')
nfw_sham_wp = np.load('/u/ki/swmclau2/Git/pearce/bin/mock_measurements/NFWIZED_SHAMmock_wp.npy')

In [ ]:
plt.plot(rbc, sham_wp, label ='SHAM')
plt.plot(rbc, shuffled_sham_wp, label = 'Shuffled')
plt.plot(rbc, nfw_sham_wp, label = 'NFW')
plt.loglog();
plt.legend(loc='best')

In [ ]:
plt.plot(rbc, sham_wp/sham_wp, label ='SHAM')
plt.plot(rbc, shuffled_sham_wp/sham_wp, label = 'Shuffled')
plt.plot(rbc, nfw_sham_wp/sham_wp, label = 'NFW')
plt.xscale('log');
plt.legend(loc='best')

In [ ]:
sham_ds = np.load('/u/ki/swmclau2/Git/pearce/bin/mock_measurements/SHAMmock_ds.npy')
shuffled_sham_ds = np.load('/u/ki/swmclau2/Git/pearce/bin/mock_measurements/SHUFFLED_SHAMmock_ds.npy')
nfw_sham_ds = np.load('/u/ki/swmclau2/Git/pearce/bin/mock_measurements/NFWIZED_SHAMmock_ds.npy')

In [ ]:
plt.plot(rbc, sham_ds, label ='SHAM')
plt.plot(rbc, shuffled_sham_ds, label = 'Shuffled')
plt.plot(rbc, nfw_sham_ds, label = 'NFW')
plt.loglog();
plt.legend(loc='best')

In [ ]:
plt.plot(rbc, sham_ds/sham_ds, label ='SHAM')
plt.plot(rbc, shuffled_sham_ds/sham_ds, label = 'Shuffled')
plt.plot(rbc, nfw_sham_ds/sham_ds, label = 'NFW')
plt.xscale('log');
plt.legend(loc='best')

In [ ]:
mdpl2 = MDPL2()

In [ ]:
cat = TrainingBox(0,system='ki-ls')
cat.load_model(1.0, HOD='abZheng07')

In [ ]:
mdpl2.pmass

In [ ]:
mass_function = np.histogram(halo_catalog[halo_catalog['halo_upid']==-1]['halo_mvir'], haloprop_bins)[0]

In [ ]:
mass_function[hbc<mdpl2.pmass*100] = 0.0

In [ ]:
plt.plot(hbc, mass_function)
plt.loglog();

In [ ]:
def calc_analytic_nd(cat, params, min_ptcl):
    hod = calc_hod(cat, params, hbc)
    return np.sum(mass_function * hod) / ((1000) ** 3)  # /self.h)**3)

In [ ]:
from scipy.optimize import minimize_scalar, curve_fit
def add_logMmin(hod_params, cat):

    hod_params['logMmin'] = 13.0 #initial guess
    #cat.populate(hod_params) #may be overkill, but will ensure params are written everywhere
    def func(logMmin, hod_params):
        hod_params.update({'logMmin':logMmin})
        return (calc_analytic_nd(cat,hod_params, min_ptcl=100) - 5e-4)**2

    res = minimize_scalar(func, bounds = (12.0,14.0),\
                          args = (hod_params,), \
                          options = {'maxiter':100}, method = 'Bounded')

    # assuming this doens't fail
    hod_params['logMmin'] = res.x
    #print res.x,
    #print calc_analytic_nd(cat, hod_params, min_ptcl=100)

In [ ]:
def calc_hod(cat, params, bin_centers):
    cat.model.param_dict.update(params)
    cens_occ, sats_occ = cat.model.model_dictionary['centrals_occupation'], cat.model.model_dictionary[
        'satellites_occupation']
    for key, val in params.iteritems():
        if key in cens_occ.param_dict:
            cens_occ.param_dict[key] = val
        if key in sats_occ.param_dict:
            sats_occ.param_dict[key] = val

    cen_hod = getattr(cens_occ, "baseline_mean_occupation", cens_occ.mean_occupation)(prim_haloprop=bin_centers)

    sat_hod = getattr(sats_occ, "baseline_mean_occupation", sats_occ.mean_occupation)(prim_haloprop=bin_centers)

    return cen_hod, sat_hod

In [ ]:
chain_fname = '/u/ki/swmclau2/des/PearceMCMC/NFWizedSHAM_fixed_cosmo_wp_ds_rmin_None_CAB.hdf5'

In [ ]:
f = h5py.File(chain_fname, 'r')

In [ ]:
tf = f.attrs['training_file']

In [ ]:
n_walkers = f.attrs['nwalkers']
chain_pnames = f.attrs['param_names']
n_burn = 10000

In [ ]:
chain = f['chain'][:n_burn*n_walkers]

In [ ]:
chain.shape

In [ ]:
f.close()

In [ ]:
downsample_npoints = 100
downsample_idxs = np.random.choice(chain.shape[0], downsample_npoints, replace=False)

In [ ]:
downsample_hod_chain = chain[downsample_idxs, 7:]

In [ ]:
hod_chain_pnames = chain_pnames[7:]

In [ ]:
cen_mask = sham_catalog['halo_upid'] == -1
sham_cen_hod, _ = hod_from_mock( sham_catalog['halo_mvir_host_halo'][cen_mask],halo_catalog['halo_mvir'], haloprop_bins)
sham_sat_hod, _  = hod_from_mock( sham_catalog['halo_mvir_host_halo'][~cen_mask],halo_catalog['halo_mvir'], haloprop_bins)

In [ ]:
fig = plt.figure(figsize=(12,6) )
for point in downsample_hod_chain:
    params = dict(zip(hod_chain_pnames,point))
    add_logMmin(params, cat)

    cen_hod, sat_hod = calc_hod(cat, params , hbc)
    plt.subplot(121)
    plt.plot(hbc, cen_hod, alpha = 0.2)
    plt.subplot(122)
    plt.plot(hbc, sat_hod, alpha = 0.2)

plt.subplot(121)
plt.plot(hbc, sham_cen_hod, color ='k')#, ls = ':')
plt.ylim([1e-6, 1.2])
plt.loglog();
plt.subplot(122)
plt.plot(hbc, sham_sat_hod, color = 'k')#, ls = '--')
plt.ylim([1e-6, 1e2])
plt.loglog();
plt.show();

In [ ]:
def cen_hod(hbc, sigma_logM, logMmin, full_range = False):
    params = {'sigma_logM': sigma_logM, 'logMmin': logMmin}
    cat.model.param_dict.update(params)
    
    cens_occ = cat.model.model_dictionary['centrals_occupation']
    
    for key, val in params.iteritems():
        if key in cens_occ.param_dict:
            cens_occ.param_dict[key] = val
            
    cen_hod = getattr(cens_occ, "baseline_mean_occupation", cens_occ.mean_occupation)(prim_haloprop=hbc)
    #sat_hod = getattr(sats_occ, "baseline_mean_occupation", sats_occ.mean_occupation)(prim_haloprop=bin_centers)
    if full_range:
        return cen_hod
    
    return np.log10(cen_hod[15:-4])

In [ ]:
def sat_hod(hbc, alpha, logM0, logM1, full_range = False):
    params = {'alpha': alpha, 'logM0': logM0, 'logM1': logM1}
    params.update( {'sigma_logM': t[0], 'logMmin': t[1]})
    cat.model.param_dict.update(params)
    sats_occ = cat.model.model_dictionary['satellites_occupation']
    cens_occ, sats_occ = cat.model.model_dictionary['centrals_occupation'], cat.model.model_dictionary[
        'satellites_occupation']
    for key, val in params.iteritems():
        if key in cens_occ.param_dict:
            cens_occ.param_dict[key] = val
        if key in sats_occ.param_dict:
            sats_occ.param_dict[key] = val
                
    sat_hod = getattr(sats_occ, "baseline_mean_occupation", sats_occ.mean_occupation)(prim_haloprop=hbc)

    if full_range:
        return sat_hod
    return sat_hod[:-4]

In [ ]:
t = curve_fit(cen_hod, hbc, np.log10(sham_cen_hod[15:-4]), p0 = [0.5, 13.5])[0]
print t

In [ ]:
plt.plot(hbc, cen_hod(hbc, *t, full_range=True))
plt.plot(hbc, sham_cen_hod)
plt.ylim([1e-6, 2])
plt.loglog();

In [ ]:
t2 = curve_fit(sat_hod, hbc,sham_sat_hod[:-4], p0 = [0.9, 13.3, 14.5])[0]
print t2

In [ ]:
plt.plot(hbc, sat_hod(hbc, 1.038, 13.0, 14.56, full_range=True))
plt.plot(hbc, sham_sat_hod)
plt.ylim([1e-6, 50])
plt.loglog();

In [ ]:
from pearce.emulator import LemonPepperWet
emus = []
for tmp in tf:
    print tmp
    emus.append(LemonPepperWet(tmp, fixed_params = {'z':0.0}, hyperparams = {}) )

In [ ]:
h = 0.6777
cosmo_pnames = ['ombh2', 'omch2', 'w0', 'ns', 'H0', 'Neff', 'sigma8']
cosmo_true_vals = np.array([0.048206*h**2, 0.307115*h**2,\
                            -1, 0.9667, \
                                h*100, 3.046, 0.8228]) #mdpl2

cat_val_dict = dict(zip(cosmo_pnames, cosmo_true_vals))

In [ ]:
hod_pnames = ['sigma_logM', 'logM0', 'logM1', 'alpha', 'conc_gal_bias',\
              'mean_occupation_centrals_assembias_param1',\
              'mean_occupation_satellites_assembias_param1',
             'mean_occupation_centrals_assembias_slope1',\
              'mean_occupation_satellites_assembias_slope1']
#hod_true_vals = [t[0], 12.5, t2[2], t2[0], 1.0, 0.0, 0.0, 1.0, 1.0]
#hod_true_vals = [0.2, 13.2, 14.5, 0.95, 1.0, 0.0, 0.0, 1.0, 1.0]

In [ ]:
chain.mean(axis = 0)

In [ ]:
hod_true_vals

In [ ]:
true_param_dict = cat_val_dict
#true_param_dict.update(dict(zip(hod_pnames, hod_true_vals)))
true_param_dict.update(zip(chain_pnames, chain.mean(axis=0)))

In [ ]:
emu_wp = emus[0].emulate_wrt_r(true_param_dict).squeeze()

In [ ]:
plt.plot(rbc, (10**emu_wp), label = 'Emu')
plt.plot(rbc, sham_wp, label ='SHAM')
plt.plot(rbc, shuffled_sham_wp, label = 'Shuffled')
plt.plot(rbc, nfw_sham_wp, label = 'NFW')
plt.loglog();
plt.legend(loc='best')

In [ ]:
plt.plot(rbc, (10**emu_wp)/sham_wp, label = 'Emu')
plt.plot(rbc, sham_wp/sham_wp, label ='SHAM')
plt.plot(rbc, shuffled_sham_wp/sham_wp, label = 'Shuffled')
plt.plot(rbc, nfw_sham_wp/sham_wp, label = 'NFW')
plt.xscale('log');
plt.legend(loc='best')

In [ ]:
emu_ds = emus[1].emulate_wrt_r(true_param_dict).squeeze()

In [ ]:
plt.plot(rbc, (10**emu_ds), label = 'Emu')
plt.plot(rbc, sham_ds, label ='SHAM')
plt.plot(rbc, shuffled_sham_ds, label = 'Shuffled')
plt.plot(rbc, nfw_sham_ds, label = 'NFW')
plt.loglog();
plt.legend(loc='best')

In [ ]:
plt.plot(rbc, (10**emu_ds)/sham_ds, label = 'Emu')
plt.plot(rbc, sham_ds/sham_ds, label ='SHAM')
plt.plot(rbc, shuffled_sham_ds/sham_ds, label = 'Shuffled')
plt.plot(rbc, nfw_sham_ds/sham_ds, label = 'NFW')
plt.xscale('log');
plt.legend(loc='best')
def split_hod_plot(HOD, ab_params, sec_haloprop_key='halo_local_density_10', n_splits = 4, cmap_name = 'blue'): cat.load_model(1.0, HOD=HOD, hod_kwargs= {'sec_haloprop_key': sec_haloprop_key}) cat.model.param_dict['logMmin'] = 13.0 cat.model.param_dict['logM0'] = 12.5 cat.populate(ab_params, min_ptcl = 100) print cat.model.param_dict catalog = cat.model.mock.galaxy_table sec_percentiles = compute_conditional_percentiles(prim_haloprop = cat.model.mock.halo_table['halo_mvir'],\ sec_haloprop = cat.model.mock.halo_table[sec_haloprop_key], prim_haloprop_bin_boundaries= mass_bins) sec_gal_percentiles = get_haloprop_of_galaxies(catalog['halo_id'], cat.model.mock.halo_table['halo_id'], sec_percentiles) # TODO bins here hods = np.zeros((n_splits, len(mass_bin_centers))) perc_ranges = np.linspace(0,1, n_splits+1) cmap = sns.color_palette(cmap_name, n_splits) #cmap = sns.dark_palette(cmap_name, n_splits) for i,c in enumerate(cmap): sec_bin_gals = np.logical_and(perc_ranges[i] < sec_gal_percentiles, sec_gal_percentiles

In [ ]: